/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    BVDecomposeSegCVSub.java
 *    Copyright (C) 2003 Paul Conilione
 *
 *    Based on the class: BVDecompose.java by Len Trigg (1999)
 */

/*
 *    DEDICATION
 *
 *    Paul Conilione would like to express his deep gratitude and appreciation
 *    to his Chinese Buddhist Taoist Master Sifu Chow Yuk Nen for the abilities
 *    and insight that he has been taught, which have allowed him to program in 
 *    a clear and efficient manner.
 *
 *    Master Sifu Chow Yuk Nen's Teachings are unique and precious. They are
 *    applicable to any field of human endeavour. Through his unique and powerful
 *    ability to skilfully apply Chinese Buddhist Teachings, people have achieved
 *    success in; Computing, chemical engineering, business, accounting, philosophy
 *    and more.
 *
 */

package weka.classifiers;

import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.Reader;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/**
 * <!-- globalinfo-start --> This class performs Bias-Variance decomposion on
 * any classifier using the sub-sampled cross-validation procedure as specified
 * in (1).<br/>
 * The Kohavi and Wolpert definition of bias and variance is specified in (2).<br/>
 * The Webb definition of bias and variance is specified in (3).<br/>
 * <br/>
 * Geoffrey I. Webb, Paul Conilione (2002). Estimating bias and variance from
 * data. School of Computer Science and Software Engineering, Victoria,
 * Australia.<br/>
 * <br/>
 * Ron Kohavi, David H. Wolpert: Bias Plus Variance Decomposition for Zero-One
 * Loss Functions. In: Machine Learning: Proceedings of the Thirteenth
 * International Conference, 275-283, 1996.<br/>
 * <br/>
 * Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting
 * and Wagging. Machine Learning. 40(2):159-196.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;misc{Webb2002,
 *    address = {School of Computer Science and Software Engineering, Victoria, Australia},
 *    author = {Geoffrey I. Webb and Paul Conilione},
 *    institution = {Monash University},
 *    title = {Estimating bias and variance from data},
 *    year = {2002},
 *    PDF = {http://www.csse.monash.edu.au/\~webb/Files/WebbConilione04.pdf}
 * }
 * 
 * &#64;inproceedings{Kohavi1996,
 *    author = {Ron Kohavi and David H. Wolpert},
 *    booktitle = {Machine Learning: Proceedings of the Thirteenth International Conference},
 *    editor = {Lorenza Saitta},
 *    pages = {275-283},
 *    publisher = {Morgan Kaufmann},
 *    title = {Bias Plus Variance Decomposition for Zero-One Loss Functions},
 *    year = {1996},
 *    PS = {http://robotics.stanford.edu/\~ronnyk/biasVar.ps}
 * }
 * 
 * &#64;article{Webb2000,
 *    author = {Geoffrey I. Webb},
 *    journal = {Machine Learning},
 *    number = {2},
 *    pages = {159-196},
 *    title = {MultiBoosting: A Technique for Combining Boosting and Wagging},
 *    volume = {40},
 *    year = {2000}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -c &lt;class index&gt;
 *  The index of the class attribute.
 *  (default last)
 * </pre>
 * 
 * <pre>
 * -D
 *  Turn on debugging output.
 * </pre>
 * 
 * <pre>
 * -l &lt;num&gt;
 *  The number of times each instance is classified.
 *  (default 10)
 * </pre>
 * 
 * <pre>
 * -p &lt;proportion of objects in common&gt;
 *  The average proportion of instances common between any two training sets
 * </pre>
 * 
 * <pre>
 * -s &lt;seed&gt;
 *  The random number seed used.
 * </pre>
 * 
 * <pre>
 * -t &lt;name of arff file&gt;
 *  The name of the arff file used for the decomposition.
 * </pre>
 * 
 * <pre>
 * -T &lt;number of instances in training set&gt;
 *  The number of instances in the training set.
 * </pre>
 * 
 * <pre>
 * -W &lt;classifier class name&gt;
 *  Full class name of the learner used in the decomposition.
 *  eg: weka.classifiers.bayes.NaiveBayes
 * </pre>
 * 
 * <pre>
 * Options specific to learner weka.classifiers.rules.ZeroR:
 * </pre>
 * 
 * <pre>
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * Options after -- are passed to the designated sub-learner.
 * <p>
 * 
 * @author Paul Conilione (paulc4321@yahoo.com.au)
 * @version $Revision: 1.7 $
 */
public class BVDecomposeSegCVSub implements OptionHandler,
		TechnicalInformationHandler, RevisionHandler {

	/** Debugging mode, gives extra output if true. */
	protected boolean m_Debug;

	/** An instantiated base classifier used for getting and testing options. */
	protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();

	/** The options to be passed to the base classifier. */
	protected String[] m_ClassifierOptions;

	/** The number of times an instance is classified */
	protected int m_ClassifyIterations;

	/** The name of the data file used for the decomposition */
	protected String m_DataFileName;

	/** The index of the class attribute */
	protected int m_ClassIndex = -1;

	/** The random number seed */
	protected int m_Seed = 1;

	/** The calculated Kohavi & Wolpert bias (squared) */
	protected double m_KWBias;

	/** The calculated Kohavi & Wolpert variance */
	protected double m_KWVariance;

	/** The calculated Kohavi & Wolpert sigma */
	protected double m_KWSigma;

	/** The calculated Webb bias */
	protected double m_WBias;

	/** The calculated Webb variance */
	protected double m_WVariance;

	/** The error rate */
	protected double m_Error;

	/** The training set size */
	protected int m_TrainSize;

	/** Proportion of instances common between any two training sets. */
	protected double m_P;

	/**
	 * Returns a string describing this object
	 * 
	 * @return a description of the classifier suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String globalInfo() {
		return "This class performs Bias-Variance decomposion on any classifier using the "
				+ "sub-sampled cross-validation procedure as specified in (1).\n"
				+ "The Kohavi and Wolpert definition of bias and variance is specified in (2).\n"
				+ "The Webb definition of bias and variance is specified in (3).\n\n"
				+ getTechnicalInformation().toString();
	}

	/**
	 * Returns an instance of a TechnicalInformation object, containing detailed
	 * information about the technical background of this class, e.g., paper
	 * reference or book this class is based on.
	 * 
	 * @return the technical information about this class
	 */
	public TechnicalInformation getTechnicalInformation() {
		TechnicalInformation result;
		TechnicalInformation additional;

		result = new TechnicalInformation(Type.MISC);
		result.setValue(Field.AUTHOR, "Geoffrey I. Webb and Paul Conilione");
		result.setValue(Field.YEAR, "2002");
		result.setValue(Field.TITLE, "Estimating bias and variance from data");
		result.setValue(Field.INSTITUTION, "Monash University");
		result.setValue(Field.ADDRESS,
				"School of Computer Science and Software Engineering, Victoria, Australia");
		result.setValue(Field.PDF,
				"http://www.csse.monash.edu.au/~webb/Files/WebbConilione04.pdf");

		additional = result.add(Type.INPROCEEDINGS);
		additional.setValue(Field.AUTHOR, "Ron Kohavi and David H. Wolpert");
		additional.setValue(Field.YEAR, "1996");
		additional.setValue(Field.TITLE,
				"Bias Plus Variance Decomposition for Zero-One Loss Functions");
		additional
				.setValue(Field.BOOKTITLE,
						"Machine Learning: Proceedings of the Thirteenth International Conference");
		additional.setValue(Field.PUBLISHER, "Morgan Kaufmann");
		additional.setValue(Field.EDITOR, "Lorenza Saitta");
		additional.setValue(Field.PAGES, "275-283");
		additional.setValue(Field.PS,
				"http://robotics.stanford.edu/~ronnyk/biasVar.ps");

		additional = result.add(Type.ARTICLE);
		additional.setValue(Field.AUTHOR, "Geoffrey I. Webb");
		additional.setValue(Field.YEAR, "2000");
		additional
				.setValue(Field.TITLE,
						"MultiBoosting: A Technique for Combining Boosting and Wagging");
		additional.setValue(Field.JOURNAL, "Machine Learning");
		additional.setValue(Field.VOLUME, "40");
		additional.setValue(Field.NUMBER, "2");
		additional.setValue(Field.PAGES, "159-196");

		return result;
	}

	/**
	 * Returns an enumeration describing the available options.
	 * 
	 * @return an enumeration of all the available options.
	 */
	public Enumeration listOptions() {

		Vector newVector = new Vector(8);

		newVector.addElement(new Option("\tThe index of the class attribute.\n"
				+ "\t(default last)", "c", 1, "-c <class index>"));
		newVector.addElement(new Option("\tTurn on debugging output.", "D", 0,
				"-D"));
		newVector.addElement(new Option(
				"\tThe number of times each instance is classified.\n"
						+ "\t(default 10)", "l", 1, "-l <num>"));
		newVector
				.addElement(new Option(
						"\tThe average proportion of instances common between any two training sets",
						"p", 1, "-p <proportion of objects in common>"));
		newVector.addElement(new Option("\tThe random number seed used.", "s",
				1, "-s <seed>"));
		newVector.addElement(new Option(
				"\tThe name of the arff file used for the decomposition.", "t",
				1, "-t <name of arff file>"));
		newVector.addElement(new Option(
				"\tThe number of instances in the training set.", "T", 1,
				"-T <number of instances in training set>"));
		newVector.addElement(new Option(
				"\tFull class name of the learner used in the decomposition.\n"
						+ "\teg: weka.classifiers.bayes.NaiveBayes", "W", 1,
				"-W <classifier class name>"));

		if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) {
			newVector.addElement(new Option("", "", 0,
					"\nOptions specific to learner "
							+ m_Classifier.getClass().getName() + ":"));
			Enumeration enu = ((OptionHandler) m_Classifier).listOptions();
			while (enu.hasMoreElements()) {
				newVector.addElement(enu.nextElement());
			}
		}
		return newVector.elements();
	}

	/**
	 * Sets the OptionHandler's options using the given list. All options will
	 * be set (or reset) during this call (i.e. incremental setting of options
	 * is not possible).
	 * <p/>
	 * 
	 * <!-- options-start --> Valid options are:
	 * <p/>
	 * 
	 * <pre>
	 * -c &lt;class index&gt;
	 *  The index of the class attribute.
	 *  (default last)
	 * </pre>
	 * 
	 * <pre>
	 * -D
	 *  Turn on debugging output.
	 * </pre>
	 * 
	 * <pre>
	 * -l &lt;num&gt;
	 *  The number of times each instance is classified.
	 *  (default 10)
	 * </pre>
	 * 
	 * <pre>
	 * -p &lt;proportion of objects in common&gt;
	 *  The average proportion of instances common between any two training sets
	 * </pre>
	 * 
	 * <pre>
	 * -s &lt;seed&gt;
	 *  The random number seed used.
	 * </pre>
	 * 
	 * <pre>
	 * -t &lt;name of arff file&gt;
	 *  The name of the arff file used for the decomposition.
	 * </pre>
	 * 
	 * <pre>
	 * -T &lt;number of instances in training set&gt;
	 *  The number of instances in the training set.
	 * </pre>
	 * 
	 * <pre>
	 * -W &lt;classifier class name&gt;
	 *  Full class name of the learner used in the decomposition.
	 *  eg: weka.classifiers.bayes.NaiveBayes
	 * </pre>
	 * 
	 * <pre>
	 * Options specific to learner weka.classifiers.rules.ZeroR:
	 * </pre>
	 * 
	 * <pre>
	 * -D
	 *  If set, classifier is run in debug mode and
	 *  may output additional info to the console
	 * </pre>
	 * 
	 * <!-- options-end -->
	 * 
	 * @param options
	 *            the list of options as an array of strings
	 * @throws Exception
	 *             if an option is not supported
	 */
	public void setOptions(String[] options) throws Exception {
		setDebug(Utils.getFlag('D', options));

		String classIndex = Utils.getOption('c', options);
		if (classIndex.length() != 0) {
			if (classIndex.toLowerCase().equals("last")) {
				setClassIndex(0);
			} else if (classIndex.toLowerCase().equals("first")) {
				setClassIndex(1);
			} else {
				setClassIndex(Integer.parseInt(classIndex));
			}
		} else {
			setClassIndex(0);
		}

		String classifyIterations = Utils.getOption('l', options);
		if (classifyIterations.length() != 0) {
			setClassifyIterations(Integer.parseInt(classifyIterations));
		} else {
			setClassifyIterations(10);
		}

		String prob = Utils.getOption('p', options);
		if (prob.length() != 0) {
			setP(Double.parseDouble(prob));
		} else {
			setP(-1);
		}
		// throw new Exception("A proportion must be specified" +
		// " with a -p option.");

		String seedString = Utils.getOption('s', options);
		if (seedString.length() != 0) {
			setSeed(Integer.parseInt(seedString));
		} else {
			setSeed(1);
		}

		String dataFile = Utils.getOption('t', options);
		if (dataFile.length() != 0) {
			setDataFileName(dataFile);
		} else {
			throw new Exception("An arff file must be specified"
					+ " with the -t option.");
		}

		String trainSize = Utils.getOption('T', options);
		if (trainSize.length() != 0) {
			setTrainSize(Integer.parseInt(trainSize));
		} else {
			setTrainSize(-1);
		}
		// throw new Exception("A training set size must be specified" +
		// " with a -T option.");

		String classifierName = Utils.getOption('W', options);
		if (classifierName.length() != 0) {
			setClassifier(Classifier.forName(classifierName,
					Utils.partitionOptions(options)));
		} else {
			throw new Exception(
					"A learner must be specified with the -W option.");
		}
	}

	/**
	 * Gets the current settings of the CheckClassifier.
	 * 
	 * @return an array of strings suitable for passing to setOptions
	 */
	public String[] getOptions() {

		String[] classifierOptions = new String[0];
		if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) {
			classifierOptions = ((OptionHandler) m_Classifier).getOptions();
		}
		String[] options = new String[classifierOptions.length + 14];
		int current = 0;
		if (getDebug()) {
			options[current++] = "-D";
		}
		options[current++] = "-c";
		options[current++] = "" + getClassIndex();
		options[current++] = "-l";
		options[current++] = "" + getClassifyIterations();
		options[current++] = "-p";
		options[current++] = "" + getP();
		options[current++] = "-s";
		options[current++] = "" + getSeed();
		if (getDataFileName() != null) {
			options[current++] = "-t";
			options[current++] = "" + getDataFileName();
		}
		options[current++] = "-T";
		options[current++] = "" + getTrainSize();
		if (getClassifier() != null) {
			options[current++] = "-W";
			options[current++] = getClassifier().getClass().getName();
		}

		options[current++] = "--";
		System.arraycopy(classifierOptions, 0, options, current,
				classifierOptions.length);
		current += classifierOptions.length;
		while (current < options.length) {
			options[current++] = "";
		}
		return options;
	}

	/**
	 * Set the classifiers being analysed
	 * 
	 * @param newClassifier
	 *            the Classifier to use.
	 */
	public void setClassifier(Classifier newClassifier) {

		m_Classifier = newClassifier;
	}

	/**
	 * Gets the name of the classifier being analysed
	 * 
	 * @return the classifier being analysed.
	 */
	public Classifier getClassifier() {

		return m_Classifier;
	}

	/**
	 * Sets debugging mode
	 * 
	 * @param debug
	 *            true if debug output should be printed
	 */
	public void setDebug(boolean debug) {

		m_Debug = debug;
	}

	/**
	 * Gets whether debugging is turned on
	 * 
	 * @return true if debugging output is on
	 */
	public boolean getDebug() {

		return m_Debug;
	}

	/**
	 * Sets the random number seed
	 * 
	 * @param seed
	 *            the random number seed
	 */
	public void setSeed(int seed) {

		m_Seed = seed;
	}

	/**
	 * Gets the random number seed
	 * 
	 * @return the random number seed
	 */
	public int getSeed() {

		return m_Seed;
	}

	/**
	 * Sets the number of times an instance is classified
	 * 
	 * @param classifyIterations
	 *            number of times an instance is classified
	 */
	public void setClassifyIterations(int classifyIterations) {

		m_ClassifyIterations = classifyIterations;
	}

	/**
	 * Gets the number of times an instance is classified
	 * 
	 * @return the maximum number of times an instance is classified
	 */
	public int getClassifyIterations() {

		return m_ClassifyIterations;
	}

	/**
	 * Sets the name of the dataset file.
	 * 
	 * @param dataFileName
	 *            name of dataset file.
	 */
	public void setDataFileName(String dataFileName) {

		m_DataFileName = dataFileName;
	}

	/**
	 * Get the name of the data file used for the decomposition
	 * 
	 * @return the name of the data file
	 */
	public String getDataFileName() {

		return m_DataFileName;
	}

	/**
	 * Get the index (starting from 1) of the attribute used as the class.
	 * 
	 * @return the index of the class attribute
	 */
	public int getClassIndex() {

		return m_ClassIndex + 1;
	}

	/**
	 * Sets index of attribute to discretize on
	 * 
	 * @param classIndex
	 *            the index (starting from 1) of the class attribute
	 */
	public void setClassIndex(int classIndex) {

		m_ClassIndex = classIndex - 1;
	}

	/**
	 * Get the calculated bias squared according to the Kohavi and Wolpert
	 * definition
	 * 
	 * @return the bias squared
	 */
	public double getKWBias() {

		return m_KWBias;
	}

	/**
	 * Get the calculated bias according to the Webb definition
	 * 
	 * @return the bias
	 * 
	 */
	public double getWBias() {

		return m_WBias;
	}

	/**
	 * Get the calculated variance according to the Kohavi and Wolpert
	 * definition
	 * 
	 * @return the variance
	 */
	public double getKWVariance() {

		return m_KWVariance;
	}

	/**
	 * Get the calculated variance according to the Webb definition
	 * 
	 * @return the variance according to Webb
	 * 
	 */
	public double getWVariance() {

		return m_WVariance;
	}

	/**
	 * Get the calculated sigma according to the Kohavi and Wolpert definition
	 * 
	 * @return the sigma
	 * 
	 */
	public double getKWSigma() {

		return m_KWSigma;
	}

	/**
	 * Set the training size.
	 * 
	 * @param size
	 *            the size of the training set
	 * 
	 */
	public void setTrainSize(int size) {

		m_TrainSize = size;
	}

	/**
	 * Get the training size
	 * 
	 * @return the size of the training set
	 * 
	 */
	public int getTrainSize() {

		return m_TrainSize;
	}

	/**
	 * Set the proportion of instances that are common between two training sets
	 * used to train a classifier.
	 * 
	 * @param proportion
	 *            the proportion of instances that are common between training
	 *            sets.
	 * 
	 */
	public void setP(double proportion) {

		m_P = proportion;
	}

	/**
	 * Get the proportion of instances that are common between two training
	 * sets.
	 * 
	 * @return the proportion
	 * 
	 */
	public double getP() {

		return m_P;
	}

	/**
	 * Get the calculated error rate
	 * 
	 * @return the error rate
	 */
	public double getError() {

		return m_Error;
	}

	/**
	 * Carry out the bias-variance decomposition using the sub-sampled
	 * cross-validation method.
	 * 
	 * @throws Exception
	 *             if the decomposition couldn't be carried out
	 */
	public void decompose() throws Exception {

		Reader dataReader;
		Instances data;

		int tps; // training pool size, size of segment E.
		int k; // number of folds in segment E.
		int q; // number of segments of size tps.

		dataReader = new BufferedReader(new FileReader(m_DataFileName)); // open
																			// file
		data = new Instances(dataReader); // encapsulate in wrapper class called
											// weka.Instances()

		if (m_ClassIndex < 0) {
			data.setClassIndex(data.numAttributes() - 1);
		} else {
			data.setClassIndex(m_ClassIndex);
		}

		if (data.classAttribute().type() != Attribute.NOMINAL) {
			throw new Exception("Class attribute must be nominal");
		}
		int numClasses = data.numClasses();

		data.deleteWithMissingClass();
		if (data.checkForStringAttributes()) {
			throw new Exception("Can't handle string attributes!");
		}

		// Dataset size must be greater than 2
		if (data.numInstances() <= 2) {
			throw new Exception("Dataset size must be greater than 2.");
		}

		if (m_TrainSize == -1) { // default value
			m_TrainSize = (int) Math.floor((double) data.numInstances() / 2.0);
		} else if (m_TrainSize < 0 || m_TrainSize >= data.numInstances() - 1) { // Check
																				// if
																				// 0
																				// <
																				// training
																				// Size
																				// <
																				// D
																				// -
																				// 1
			throw new Exception("Training set size of " + m_TrainSize
					+ " is invalid.");
		}

		if (m_P == -1) { // default value
			m_P = (double) m_TrainSize / ((double) data.numInstances() - 1);
		} else if (m_P < (m_TrainSize / ((double) data.numInstances() - 1))
				|| m_P >= 1.0) { // Check if p is in range: m/(|D|-1) <= p < 1.0
			throw new Exception("Proportion is not in range: "
					+ (m_TrainSize / ((double) data.numInstances() - 1))
					+ " <= p < 1.0 ");
		}

		// roundup tps from double to integer
		tps = (int) Math.ceil(((double) m_TrainSize / (double) m_P) + 1);
		k = (int) Math.ceil(tps / (tps - (double) m_TrainSize));

		// number of folds cannot be more than the number of instances in the
		// training pool
		if (k > tps) {
			throw new Exception("The required number of folds is too many."
					+ "Change p or the size of the training set.");
		}

		// calculate the number of segments, round down.
		q = (int) Math.floor((double) data.numInstances() / (double) tps);

		// create confusion matrix, columns = number of instances in data set,
		// as all will be used, by rows = number of classes.
		double[][] instanceProbs = new double[data.numInstances()][numClasses];
		int[][] foldIndex = new int[k][2];
		Vector segmentList = new Vector(q + 1);

		// Set random seed
		Random random = new Random(m_Seed);

		data.randomize(random);

		// create index arrays for different segments

		int currentDataIndex = 0;

		for (int count = 1; count <= (q + 1); count++) {
			if (count > q) {
				int[] segmentIndex = new int[(data.numInstances() - (q * tps))];
				for (int index = 0; index < segmentIndex.length; index++, currentDataIndex++) {

					segmentIndex[index] = currentDataIndex;
				}
				segmentList.add(segmentIndex);
			} else {
				int[] segmentIndex = new int[tps];

				for (int index = 0; index < segmentIndex.length; index++, currentDataIndex++) {
					segmentIndex[index] = currentDataIndex;
				}
				segmentList.add(segmentIndex);
			}
		}

		int remainder = tps % k; // remainder is used to determine when to
									// shrink the fold size by 1.

		// foldSize = ROUNDUP( tps / k ) (round up, eg 3 -> 3, 3.3->4)
		int foldSize = (int) Math.ceil((double) tps / (double) k); // roundup
																	// fold size
																	// double to
																	// integer
		int index = 0;
		int currentIndex;

		for (int count = 0; count < k; count++) {
			if (remainder != 0 && count == remainder) {
				foldSize -= 1;
			}
			foldIndex[count][0] = index;
			foldIndex[count][1] = foldSize;
			index += foldSize;
		}

		for (int l = 0; l < m_ClassifyIterations; l++) {

			for (int i = 1; i <= q; i++) {

				int[] currentSegment = (int[]) segmentList.get(i - 1);

				randomize(currentSegment, random);

				// CROSS FOLD VALIDATION for current Segment
				for (int j = 1; j <= k; j++) {

					Instances TP = null;
					for (int foldNum = 1; foldNum <= k; foldNum++) {
						if (foldNum != j) {

							int startFoldIndex = foldIndex[foldNum - 1][0]; // start
																			// index
							foldSize = foldIndex[foldNum - 1][1];
							int endFoldIndex = startFoldIndex + foldSize - 1;

							for (int currentFoldIndex = startFoldIndex; currentFoldIndex <= endFoldIndex; currentFoldIndex++) {

								if (TP == null) {
									TP = new Instances(data,
											currentSegment[currentFoldIndex], 1);
								} else {
									TP.add(data
											.instance(currentSegment[currentFoldIndex]));
								}
							}
						}
					}

					TP.randomize(random);

					if (getTrainSize() > TP.numInstances()) {
						throw new Exception("The training set size of "
								+ getTrainSize()
								+ ", is greater than the training pool "
								+ TP.numInstances());
					}

					Instances train = new Instances(TP, 0, m_TrainSize);

					Classifier current = Classifier.makeCopy(m_Classifier);
					current.buildClassifier(train); // create a clssifier using
													// the instances in train.

					int currentTestIndex = foldIndex[j - 1][0]; // start index
					int testFoldSize = foldIndex[j - 1][1]; // size
					int endTestIndex = currentTestIndex + testFoldSize - 1;

					while (currentTestIndex <= endTestIndex) {

						Instance testInst = data
								.instance(currentSegment[currentTestIndex]);
						int pred = (int) current.classifyInstance(testInst);

						if (pred != testInst.classValue()) {
							m_Error++; // add 1 to mis-classifications.
						}
						instanceProbs[currentSegment[currentTestIndex]][pred]++;
						currentTestIndex++;
					}

					if (i == 1 && j == 1) {
						int[] segmentElast = (int[]) segmentList.lastElement();
						for (currentIndex = 0; currentIndex < segmentElast.length; currentIndex++) {
							Instance testInst = data
									.instance(segmentElast[currentIndex]);
							int pred = (int) current.classifyInstance(testInst);
							if (pred != testInst.classValue()) {
								m_Error++; // add 1 to mis-classifications.
							}

							instanceProbs[segmentElast[currentIndex]][pred]++;
						}
					}
				}
			}
		}

		m_Error /= (double) (m_ClassifyIterations * data.numInstances());

		m_KWBias = 0.0;
		m_KWVariance = 0.0;
		m_KWSigma = 0.0;

		m_WBias = 0.0;
		m_WVariance = 0.0;

		for (int i = 0; i < data.numInstances(); i++) {

			Instance current = data.instance(i);

			double[] predProbs = instanceProbs[i];
			double pActual, pPred;
			double bsum = 0, vsum = 0, ssum = 0;
			double wBSum = 0, wVSum = 0;

			Vector centralTendencies = findCentralTendencies(predProbs);

			if (centralTendencies == null) {
				throw new Exception("Central tendency was null.");
			}

			for (int j = 0; j < numClasses; j++) {
				pActual = (current.classValue() == j) ? 1 : 0;
				pPred = predProbs[j] / m_ClassifyIterations;
				bsum += (pActual - pPred) * (pActual - pPred) - pPred
						* (1 - pPred) / (m_ClassifyIterations - 1);
				vsum += pPred * pPred;
				ssum += pActual * pActual;
			}

			m_KWBias += bsum;
			m_KWVariance += (1 - vsum);
			m_KWSigma += (1 - ssum);

			for (int count = 0; count < centralTendencies.size(); count++) {

				int wB = 0, wV = 0;
				int centralTendency = ((Integer) centralTendencies.get(count))
						.intValue();

				// For a single instance xi, find the bias and variance.
				for (int j = 0; j < numClasses; j++) {

					// Webb definition
					if (j != (int) current.classValue() && j == centralTendency) {
						wB += predProbs[j];
					}
					if (j != (int) current.classValue() && j != centralTendency) {
						wV += predProbs[j];
					}

				}
				wBSum += (double) wB;
				wVSum += (double) wV;
			}

			// calculate bais by dividing bSum by the number of central
			// tendencies and
			// total number of instances. (effectively finding the average and
			// dividing
			// by the number of instances to get the nominalised probability).

			m_WBias += (wBSum / ((double) (centralTendencies.size() * m_ClassifyIterations)));
			// calculate variance by dividing vSum by the total number of
			// interations
			m_WVariance += (wVSum / ((double) (centralTendencies.size() * m_ClassifyIterations)));

		}

		m_KWBias /= (2.0 * (double) data.numInstances());
		m_KWVariance /= (2.0 * (double) data.numInstances());
		m_KWSigma /= (2.0 * (double) data.numInstances());

		// bias = bias / number of data instances
		m_WBias /= (double) data.numInstances();
		// variance = variance / number of data instances.
		m_WVariance /= (double) data.numInstances();

		if (m_Debug) {
			System.err.println("Decomposition finished");
		}

	}

	/**
	 * Finds the central tendency, given the classifications for an instance.
	 * 
	 * Where the central tendency is defined as the class that was most commonly
	 * selected for a given instance.
	 * <p>
	 * 
	 * For example, instance 'x' may be classified out of 3 classes y = {1, 2,
	 * 3}, so if x is classified 10 times, and is classified as follows, '1' = 2
	 * times, '2' = 5 times and '3' = 3 times. Then the central tendency is '2'.
	 * <p>
	 * 
	 * However, it is important to note that this method returns a list of all
	 * classes that have the highest number of classifications.
	 * 
	 * In cases where there are several classes with the largest number of
	 * classifications, then all of these classes are returned. For example if
	 * 'x' is classified '1' = 4 times, '2' = 4 times and '3' = 2 times. Then
	 * '1' and '2' are returned.
	 * <p>
	 * 
	 * @param predProbs
	 *            the array of classifications for a single instance.
	 * 
	 * @return a Vector containing Integer objects which store the class(s)
	 *         which are the central tendency.
	 */
	public Vector findCentralTendencies(double[] predProbs) {

		int centralTValue = 0;
		int currentValue = 0;
		// array to store the list of classes the have the greatest number of
		// classifictions.
		Vector centralTClasses;

		centralTClasses = new Vector(); // create an array with size of the
										// number of classes.

		// Go through array, finding the central tendency.
		for (int i = 0; i < predProbs.length; i++) {
			currentValue = (int) predProbs[i];
			// if current value is greater than the central tendency value then
			// clear vector and add new class to vector array.
			if (currentValue > centralTValue) {
				centralTClasses.clear();
				centralTClasses.addElement(new Integer(i));
				centralTValue = currentValue;
			} else if (currentValue != 0 && currentValue == centralTValue) {
				centralTClasses.addElement(new Integer(i));
			}
		}
		// return all classes that have the greatest number of classifications.
		if (centralTValue != 0) {
			return centralTClasses;
		} else {
			return null;
		}

	}

	/**
	 * Returns description of the bias-variance decomposition results.
	 * 
	 * @return the bias-variance decomposition results as a string
	 */
	public String toString() {

		String result = "\nBias-Variance Decomposition Segmentation, Cross Validation\n"
				+ "with subsampling.\n";

		if (getClassifier() == null) {
			return "Invalid setup";
		}

		result += "\nClassifier    : " + getClassifier().getClass().getName();
		if (getClassifier() instanceof OptionHandler) {
			result += Utils.joinOptions(((OptionHandler) m_Classifier)
					.getOptions());
		}
		result += "\nData File     : " + getDataFileName();
		result += "\nClass Index   : ";
		if (getClassIndex() == 0) {
			result += "last";
		} else {
			result += getClassIndex();
		}
		result += "\nIterations    : " + getClassifyIterations();
		result += "\np             : " + getP();
		result += "\nTraining Size : " + getTrainSize();
		result += "\nSeed          : " + getSeed();

		result += "\n\nDefinition   : " + "Kohavi and Wolpert";
		result += "\nError         :" + Utils.doubleToString(getError(), 4);
		result += "\nBias^2        :" + Utils.doubleToString(getKWBias(), 4);
		result += "\nVariance      :"
				+ Utils.doubleToString(getKWVariance(), 4);
		result += "\nSigma^2       :" + Utils.doubleToString(getKWSigma(), 4);

		result += "\n\nDefinition   : " + "Webb";
		result += "\nError         :" + Utils.doubleToString(getError(), 4);
		result += "\nBias          :" + Utils.doubleToString(getWBias(), 4);
		result += "\nVariance      :" + Utils.doubleToString(getWVariance(), 4);

		return result;
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 1.7 $");
	}

	/**
	 * Test method for this class
	 * 
	 * @param args
	 *            the command line arguments
	 */
	public static void main(String[] args) {

		try {
			BVDecomposeSegCVSub bvd = new BVDecomposeSegCVSub();

			try {
				bvd.setOptions(args);
				Utils.checkForRemainingOptions(args);
			} catch (Exception ex) {
				String result = ex.getMessage() + "\nBVDecompose Options:\n\n";
				Enumeration enu = bvd.listOptions();
				while (enu.hasMoreElements()) {
					Option option = (Option) enu.nextElement();
					result += option.synopsis() + "\n" + option.description()
							+ "\n";
				}
				throw new Exception(result);
			}

			bvd.decompose();

			System.out.println(bvd.toString());

		} catch (Exception ex) {
			System.err.println(ex.getMessage());
		}

	}

	/**
	 * Accepts an array of ints and randomises the values in the array, using
	 * the random seed.
	 * 
	 * @param index
	 *            is the array of integers
	 * @param random
	 *            is the Random seed.
	 */
	public final void randomize(int[] index, Random random) {
		for (int j = index.length - 1; j > 0; j--) {
			int k = random.nextInt(j + 1);
			int temp = index[j];
			index[j] = index[k];
			index[k] = temp;
		}
	}
}
